from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = TextLoader("/root/project/Code/sshcode/lc/character.txt",encoding="utf-8")
docs = loader.load()

print(docs)

splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "。", "！", "？", "；", "，", " ", ""],
                                          chunk_size=200,
                                          chunk_overlap=40)

new_docs = splitter.split_documents(docs)
print(new_docs)
print(new_docs[0].page_content)
print(new_docs[1].page_content)
print(new_docs[2].page_content)
print(new_docs[3].page_content)